library(dplyr)
read.csv("data/fbref/games_players.csv")
matchdat <- read.csv("https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv")
## calaculating what the average performances
homeav <- matchdat %>% filter(league_id == 2411) %>% #working out the average home xg
summarise(avfor = mean(xg1, na.rm = T), avag = mean(xg2, na.rm = T))
homfor <- homeav[[1]] # extracting the number for home and away
homeag <- homeav[[2]]
# comparing each teams performance to average for xg for and against
home_rating_data <- matchdat %>% filter(league_id == 2411) %>%
group_by(team1) %>%
summarise(avfor = mean(xg1, na.rm = T), avag = mean(xg2, na.rm = T)) %>%
mutate(xgfh = avfor - homfor, xgah = avag - homeag) %>%
select(team1, xgfh, xgah)
away_rating_data <- matchdat %>% filter(league_id == 2411) %>%
group_by(team2) %>%
summarise(avfor = mean(xg1, na.rm = T), avag = mean(xg2, na.rm = T)) %>%
mutate(xgfa = avfor - homfor, xgaa = avag - homeag) %>%
select(team2, xgfa, xgaa)
## creating a data frame with matches the for and against scores and the xg deltas
matches <- matchdat %>% filter(league_id == 2411) %>%
left_join(home_rating_data, by = "team1") %>%
left_join(away_rating_data, by = "team2") %>%
mutate(deltafh = xgfh + xgaa, deltagh = xgah+xgfa) %>%
select(season, score1, score2, deltafh, deltagh)
matches
## splitting it up for home and away
mat1 <- matches %>% select(season, score1, deltafh) %>%
mutate(loc = "home")
colnames(mat1)[2] <- "score"
colnames(mat1)[3] <- "delta"
mat2 <- matches %>% select(season, score2, deltagh) %>%
mutate(loc = "away")
colnames(mat2)[2] <- "score"
colnames(mat2)[3] <- "delta"
## putting it together and creating the score as a factor
matall <- mat1 %>% bind_rows(mat2) %>%
mutate(scorecat = as.factor(if_else(score > 5,"5", as.character(score)))) %>% ### rating from 0 to 5. 5 being biggest score
select(-score, -season) ### data for calculating the chance of goals scored
matall
library(tidymodels)
##splitting the data into training and testing
score_split <- initial_split(matall, prop = 0.9, strata = scorecat)
score_train <- training(score_split)
score_test <- testing(score_split)
## creating the classification random forest
rand1 <- rand_forest() %>% #type of model
set_engine("randomForest") %>% # engine used to fit the model
set_mode("classification") %>%
fit(scorecat ~., data = score_train)
rand1
parsnip model object
Call:
randomForest(x = maybe_data_frame(x), y = y)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 1
OOB estimate of error rate: 65.83%
Confusion matrix:
0 1 2 3 4 5 class.error
0 230 934 0 0 0 0 0.8024055
1 169 1172 0 0 0 0 0.1260254
2 117 775 0 0 0 0 1.0000000
3 61 386 0 0 0 0 1.0000000
4 21 148 0 0 0 0 1.0000000
5 9 81 0 0 0 0 1.0000000
### filtering the matches to test the model on
matches_test <- matchdat %>% filter(league_id == 2411) %>%
left_join(home_rating_data, by = "team1") %>%
left_join(away_rating_data, by = "team2") %>%
mutate(deltafh = xgfh + xgaa, deltagh = xgah+xgfa) %>%
filter(season == 2020)
match20_h <- matches_test %>% select(deltafh, score1) %>%
mutate(loc = "home")
match20_a <- matches_test %>% select(deltagh, score2) %>%
mutate(loc = "away")
colnames(match20_h)[1] <- "delta"
colnames(match20_h)[2] <- "score"
colnames(match20_a)[1] <- "delta"
colnames(match20_a)[2] <- "score"
match20 <- match20_h %>% bind_rows(match20_a)
match20 <- match20 %>% bind_rows(match20) %>%
mutate(scorecat = as.factor(if_else(score > 5,"5", as.character(score)))) %>% ### rating from 0 to 5. 5 being biggest score
select(-score) ### data for calculating the chance of goals scored
match20
library(caret)
score_test$scorecat = as.factor(score_test$scorecat)
x <- predict(rand1, score_test, type = "prob")
Error in predict.randomForest(object = object$fit, newdata = new_data, :
New factor levels not present in the training data
library(fplscrapR)
fix20 <- get_game_list(season = 20)
season18 <- get_player_details(season = 18)
season19 <- get_player_details(season = 19)
season20 <- get_player_details(season = 20)
season20 %>% relocate(playername, .before = fixture)
read.csv("data/fbref/games_players.csv")
NA
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQptYXRjaGRhdCA8LSByZWFkLmNzdigiaHR0cHM6Ly9wcm9qZWN0cy5maXZldGhpcnR5ZWlnaHQuY29tL3NvY2Nlci1hcGkvY2x1Yi9zcGlfbWF0Y2hlcy5jc3YiKQpgYGAKCmBgYCB7cn0KIyMgY2FsYWN1bGF0aW5nIHdoYXQgdGhlIGF2ZXJhZ2UgcGVyZm9ybWFuY2VzIAoKaG9tZWF2IDwtIG1hdGNoZGF0ICU+JSBmaWx0ZXIobGVhZ3VlX2lkID09IDI0MTEpICU+JSAjd29ya2luZyBvdXQgdGhlIGF2ZXJhZ2UgaG9tZSB4ZyAKICAgICAgICAgICAgICAgICAgICAgICAgICBzdW1tYXJpc2UoYXZmb3IgPSBtZWFuKHhnMSwgbmEucm0gPSBUKSwgYXZhZyA9IG1lYW4oeGcyLCBuYS5ybSA9IFQpKSAKCgoKaG9tZm9yIDwtIGhvbWVhdltbMV1dICMgZXh0cmFjdGluZyB0aGUgbnVtYmVyIGZvciBob21lIGFuZCBhd2F5CgoKaG9tZWFnIDwtIGhvbWVhdltbMl1dCgoKIyBjb21wYXJpbmcgZWFjaCB0ZWFtcyBwZXJmb3JtYW5jZSB0byBhdmVyYWdlIGZvciB4ZyBmb3IgYW5kIGFnYWluc3QKaG9tZV9yYXRpbmdfZGF0YSA8LSBtYXRjaGRhdCAlPiUgZmlsdGVyKGxlYWd1ZV9pZCA9PSAyNDExKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICAgZ3JvdXBfYnkodGVhbTEpICU+JQogICAgICAgICAgICAgICAgICAgICAgICBzdW1tYXJpc2UoYXZmb3IgPSBtZWFuKHhnMSwgbmEucm0gPSBUKSwgYXZhZyA9IG1lYW4oeGcyLCBuYS5ybSA9IFQpKSAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgIG11dGF0ZSh4Z2ZoID0gYXZmb3IgLSBob21mb3IsIHhnYWggPSBhdmFnIC0gaG9tZWFnKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICAgc2VsZWN0KHRlYW0xLCB4Z2ZoLCB4Z2FoKQoKYXdheV9yYXRpbmdfZGF0YSA8LSBtYXRjaGRhdCAlPiUgZmlsdGVyKGxlYWd1ZV9pZCA9PSAyNDExKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICAgZ3JvdXBfYnkodGVhbTIpICU+JQogICAgICAgICAgICAgICAgICAgICAgICBzdW1tYXJpc2UoYXZmb3IgPSBtZWFuKHhnMSwgbmEucm0gPSBUKSwgYXZhZyA9IG1lYW4oeGcyLCBuYS5ybSA9IFQpKSAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgIG11dGF0ZSh4Z2ZhID0gYXZmb3IgLSBob21mb3IsIHhnYWEgPSBhdmFnIC0gaG9tZWFnKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICAgc2VsZWN0KHRlYW0yLCB4Z2ZhLCB4Z2FhKQoKIyMgY3JlYXRpbmcgYSBkYXRhIGZyYW1lIHdpdGggbWF0Y2hlcyB0aGUgZm9yIGFuZCBhZ2FpbnN0IHNjb3JlcyBhbmQgdGhlIHhnIGRlbHRhcyAKbWF0Y2hlcyA8LSBtYXRjaGRhdCAlPiUgZmlsdGVyKGxlYWd1ZV9pZCA9PSAyNDExKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVmdF9qb2luKGhvbWVfcmF0aW5nX2RhdGEsIGJ5ID0gInRlYW0xIikgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxlZnRfam9pbihhd2F5X3JhdGluZ19kYXRhLCBieSA9ICJ0ZWFtMiIpICU+JQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRhdGUoZGVsdGFmaCA9ICB4Z2ZoICsgeGdhYSwgZGVsdGFnaCA9IHhnYWgreGdmYSkgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlbGVjdChzZWFzb24sIHNjb3JlMSwgc2NvcmUyLCBkZWx0YWZoLCBkZWx0YWdoKQoKbWF0Y2hlcwpgYGAKCmBgYCB7cn0KIyMgc3BsaXR0aW5nIGl0IHVwIGZvciBob21lIGFuZCBhd2F5CgptYXQxIDwtIG1hdGNoZXMgJT4lIHNlbGVjdChzZWFzb24sIHNjb3JlMSwgZGVsdGFmaCkgJT4lCiAgICAgICAgICAgICAgICAgICAgICBtdXRhdGUobG9jID0gImhvbWUiKQpjb2xuYW1lcyhtYXQxKVsyXSA8LSAic2NvcmUiCmNvbG5hbWVzKG1hdDEpWzNdIDwtICJkZWx0YSIKbWF0MiA8LSBtYXRjaGVzICU+JSBzZWxlY3Qoc2Vhc29uLCBzY29yZTIsIGRlbHRhZ2gpICU+JQogICAgICAgICAgICAgICAgICAgICAgbXV0YXRlKGxvYyA9ICJhd2F5IikKY29sbmFtZXMobWF0MilbMl0gPC0gInNjb3JlIgpjb2xuYW1lcyhtYXQyKVszXSA8LSAiZGVsdGEiCgoKIyMgcHV0dGluZyBpdCB0b2dldGhlciBhbmQgY3JlYXRpbmcgdGhlIHNjb3JlIGFzIGEgZmFjdG9yCm1hdGFsbCA8LSBtYXQxICU+JSBiaW5kX3Jvd3MobWF0MikgJT4lCiAgICAgICAgICAgICAgICAgICBtdXRhdGUoc2NvcmVjYXQgPSBhcy5mYWN0b3IoaWZfZWxzZShzY29yZSA+IDUsIjUiLCBhcy5jaGFyYWN0ZXIoc2NvcmUpKSkpICU+JSAjIyMgcmF0aW5nIGZyb20gMCB0byA1LiA1IGJlaW5nIGJpZ2dlc3Qgc2NvcmUKICAgICAgICAgICAgICAgICAgIHNlbGVjdCgtc2NvcmUsIC1zZWFzb24pICMjIyBkYXRhIGZvciBjYWxjdWxhdGluZyB0aGUgY2hhbmNlIG9mIGdvYWxzIHNjb3JlZCAKCm1hdGFsbApgYGAKYGBgIHtyfQpsaWJyYXJ5KHRpZHltb2RlbHMpCgojI3NwbGl0dGluZyB0aGUgZGF0YSBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0aW5nCnNjb3JlX3NwbGl0IDwtIGluaXRpYWxfc3BsaXQobWF0YWxsLCBwcm9wID0gMC45LCBzdHJhdGEgPSBzY29yZWNhdCkgCnNjb3JlX3RyYWluIDwtIHRyYWluaW5nKHNjb3JlX3NwbGl0KQpzY29yZV90ZXN0IDwtIHRlc3Rpbmcoc2NvcmVfc3BsaXQpCgojIyBjcmVhdGluZyB0aGUgY2xhc3NpZmljYXRpb24gcmFuZG9tIGZvcmVzdApyYW5kMSA8LSByYW5kX2ZvcmVzdCgpICU+JSAjdHlwZSBvZiBtb2RlbCAKICAgICAgICAgICAgICAgIHNldF9lbmdpbmUoInJhbmRvbUZvcmVzdCIpICU+JSAjICBlbmdpbmUgdXNlZCB0byBmaXQgdGhlIG1vZGVsCiAgICAgICAgICAgICAgICAgIHNldF9tb2RlKCJjbGFzc2lmaWNhdGlvbiIpICU+JQogICAgICAgICAgICAgICAgICBmaXQoc2NvcmVjYXQgfi4sIGRhdGEgPSBzY29yZV90cmFpbikKYGBgCgpgYGAge3J9CiMjIyByYW5kb20gZm9yZXN0IG91dHB1dAoKcmFuZDEKYGBgCgpgYGAge3J9CiMjIyBmaWx0ZXJpbmcgdGhlIG1hdGNoZXMgdG8gdGVzdCB0aGUgbW9kZWwgb24gCgptYXRjaGVzX3Rlc3QgPC0gbWF0Y2hkYXQgJT4lIGZpbHRlcihsZWFndWVfaWQgPT0gMjQxMSkgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxlZnRfam9pbihob21lX3JhdGluZ19kYXRhLCBieSA9ICJ0ZWFtMSIpICU+JQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVmdF9qb2luKGF3YXlfcmF0aW5nX2RhdGEsIGJ5ID0gInRlYW0yIikgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRhdGUoZGVsdGFmaCA9ICB4Z2ZoICsgeGdhYSwgZGVsdGFnaCA9IHhnYWgreGdmYSkgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZpbHRlcihzZWFzb24gPT0gMjAyMCkKCm1hdGNoMjBfaCA8LSBtYXRjaGVzX3Rlc3QgJT4lIHNlbGVjdChkZWx0YWZoLCBzY29yZTEpICU+JSAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRhdGUobG9jID0gImhvbWUiKQoKbWF0Y2gyMF9hIDwtIG1hdGNoZXNfdGVzdCAlPiUgc2VsZWN0KGRlbHRhZ2gsIHNjb3JlMikgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbXV0YXRlKGxvYyA9ICJhd2F5IikKCmNvbG5hbWVzKG1hdGNoMjBfaClbMV0gPC0gImRlbHRhIgpjb2xuYW1lcyhtYXRjaDIwX2gpWzJdIDwtICJzY29yZSIKCmNvbG5hbWVzKG1hdGNoMjBfYSlbMV0gPC0gImRlbHRhIgpjb2xuYW1lcyhtYXRjaDIwX2EpWzJdIDwtICJzY29yZSIKCm1hdGNoMjAgPC0gbWF0Y2gyMF9oICU+JSBiaW5kX3Jvd3MobWF0Y2gyMF9hKQoKbWF0Y2gyMCA8LSBtYXRjaDIwICU+JSBiaW5kX3Jvd3MobWF0Y2gyMCkgJT4lCiAgICAgICAgICAgICAgICAgICBtdXRhdGUoc2NvcmVjYXQgPSBhcy5mYWN0b3IoaWZfZWxzZShzY29yZSA+IDUsIjUiLCBhcy5jaGFyYWN0ZXIoc2NvcmUpKSkpICU+JSAjIyMgcmF0aW5nIGZyb20gMCB0byA1LiA1IGJlaW5nIGJpZ2dlc3Qgc2NvcmUKICAgICAgICAgICAgICAgICAgIHNlbGVjdCgtc2NvcmUpICMjIyBkYXRhIGZvciBjYWxjdWxhdGluZyB0aGUgY2hhbmNlIG9mIGdvYWxzIHNjb3JlZCAKCm1hdGNoMjAKYGBgCgpgYGAge3J9CmxpYnJhcnkoY2FyZXQpCnNjb3JlX3Rlc3Qkc2NvcmVjYXQgPC0gYXMuZmFjdG9yKHNjb3JlX3Rlc3Qkc2NvcmVjYXQpCnggPC0gcHJlZGljdChyYW5kMSwgc2NvcmVfdGVzdCwgdHlwZSA9ICJwcm9iIikKCm1hdGNoMjBfcHJlZCAgPC0gbWF0Y2gyMCAlPiUgYmluZF9jb2xzKHgpCm1hdGNoMjBfcHJlZApgYGAKCmBgYCB7cn0KbGlicmFyeShmcGxzY3JhcFIpCmZpeDIwIDwtIGdldF9nYW1lX2xpc3Qoc2Vhc29uID0gMjApCgpzZWFzb24xOCA8LSBnZXRfcGxheWVyX2RldGFpbHMoc2Vhc29uID0gMTgpCnNlYXNvbjE5IDwtIGdldF9wbGF5ZXJfZGV0YWlscyhzZWFzb24gPSAxOSkKc2Vhc29uMjAgPC0gZ2V0X3BsYXllcl9kZXRhaWxzKHNlYXNvbiA9IDIwKQpgYGAKCmBgYCB7cn0Kc2Vhc29uMjAgJT4lIHJlbG9jYXRlKHBsYXllcm5hbWUsIC5iZWZvcmUgPSBmaXh0dXJlKQoKcmVhZC5jc3YoImRhdGEvZmJyZWYvZ2FtZXNfcGxheWVycy5jc3YiKQoKYGBg